R Markdown

# load("~/Desktop/Multi-Outcome Tree/Yao file/CancerData.RData")
setwd("~/Desktop/Multi-Outcome Tree R")
load("CancerData.RData")
source("Benefit_gain.R")
source("best.H.R")
source("MO_Tree_Chang.R")
source("predict_leaf.R")
source("Reg.mu.R")
source("Split.X.R")
source("visualization.R")
source("vis_tree.R")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
Custom.color <- c("#CD2626","#79aec1","#2E8B57","#d6b55a")

Including Plots

library(randomForest)
## randomForest 4.7-1
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
dat = CancerData.clean1
A = dat$`Treatment 1`
A = as.factor(A)
H = dat[,2:8]
H$`Def Loc Rx?` = as.numeric(H$`Def Loc Rx?`=="Yes")
H$Strat = as.numeric(H$Strat == "H")

# [1] "Age at Reg"                "Def Loc Rx?"              
# [3] "Time, Horm Rx to Reg (mo)" "Strat"                    
# [5] "Bsln PSA"                  "Alk Phos"                 
# [7] "Hgb" 
colnames(H) = paste("x",1:7,sep = "")
Y1 = dat$T1
Y2 = dat$E1
Y1 = (Y1=="No ET") * 0.5
Y2 = (Y2=="S") * 0.2
# Y1 = (Y1=="No ET")
# Y2 = (Y2=="S") 
Overall.Y = cbind(Y1,Y2)
class.A = sort(unique(A))
n = length(A)


k<-K<-length(class.A)
m<-N<-length(A)

label0 = paste(c("A","B","C","D"),":",class.A,sep="")

### LM with interaction

mus1.reg <- Reg.mu(Y = Y1, As = A, H = H)$mus.reg
mus2.reg <- Reg.mu(Y = Y2, As = A, H = H)$mus.reg

mu1.hat = mus1.reg
mu2.hat = mus2.reg

textsize = 12

### random forest
# RF1<-randomForest(Y1 ~., data=data.frame(A,H))
# RF2<-randomForest(Y2 ~., data=data.frame(A,H))

# 
# mus1.reg<-matrix(NA,n,length(class.A))
# for(i in 1L:length(class.A)){
#   mus1.reg[,i]<-predict(RF1,newdata=data.frame(A=rep(class.A[i],n),H))
# } 
# 
# mus2.reg<-matrix(NA,n,length(class.A))
# for(i in 1L:length(class.A)){
#   mus2.reg[,i]<-predict(RF2,newdata=data.frame(A=rep(class.A[i],n),H))
# }     
# 
# mu1.hat = mus1.reg
# mu2.hat = mus2.reg
# 
# for(ii in 1:5){
# plot(mu1.hat[ii,],mu2.hat[ii,])
# text(mu1.hat[ii,],mu2.hat[ii,],as.character(class.A))
# }      

### BART
# library(BayesTree)
# pred_X = rbind(data.frame(A=rep(class.A[1],n),H),
#                data.frame(A=rep(class.A[2],n),H),
#                data.frame(A=rep(class.A[3],n),H),
#                data.frame(A=rep(class.A[4],n),H))
#
#
# pred_T<- bart(
#   as.data.frame(cbind(A,H)),as.numeric(Y1),
#   x.test =  pred_X)
# muhat.vals <- colMeans(pred_T$yhat.test)
# mus1.reg <- matrix(muhat.vals, nrow=n,ncol=length(class.A))
#
# pred_T<- bart(
#   as.data.frame(cbind(A,H)),as.numeric(Y2),
#   x.test =  pred_X)
# muhat.vals <- colMeans(pred_T$yhat.test)
# mus2.reg <- matrix(muhat.vals, nrow=n,ncol=length(class.A))
#
# mu1.hat = mus1.reg
# mu2.hat = mus2.reg



### AIPTW

# source("~/Desktop/Multi-Outcome Tree/Yao file/MOTRL package/R/M.propen.R")
# source("~/Desktop/Multi-Outcome Tree/Yao file/MOTRL package/R/Reg.mu.R")
# library(nnet)
# 
# pis.hat <- M.propen(A = A,Xs = H)
# pis.hat[pis.hat<0.05] = 0.05



#AIPW estimates
# mus.a1<-matrix(NA,N,K)
# for(k in 1L:K){
#   mus.a1[,k] <- (A==class.A[k])*Y1/pis.hat[,k]+(1-(A==class.A[k])/pis.hat[,k])*mus1.reg[,k]
# }
# 
# #AIPW estimates
# mus.a2<-matrix(NA,N,K)
# for(k in 1L:K){
#   mus.a2[,k] <- (A==class.A[k])*Y2/pis.hat[,k]+(1-(A==class.A[k])/pis.hat[,k])*mus2.reg[,k]
# }

# mu1.hat = mus.a1
# mu2.hat = mus.a2
w0 = seq(0,1,0.05)

# treeout = DTRtree(mus.hat = list(mu1.hat,mu2.hat),
#         A,H,pis.hat=NULL,
#                   mus.reg=NULL,depth=3,
#                   minsplit=20,w_vec = cbind(w0,1-w0),weight_combine = "mean",lambda = 0.001)
  
treeout = DTRtree(mus.hat = list(mu1.hat,mu2.hat),
        A,H,pis.hat=NULL,
                  mus.reg=NULL,depth=3,
                  minsplit=20,w_vec = cbind(w0,1-w0),weight_combine = "max",lambda = 0.01)

treeout
##    node  X cutoff        mEy group
## 1     1  7   13.0 0.02578032    NA
## 2     2  2    0.0 0.03254700    NA
## 3     3  3   30.1 0.01971496    NA
## 4     4 NA     NA         NA     4
## 5     5 NA     NA         NA     5
## 6     6 NA     NA         NA     6
## 7     7 NA     NA         NA     7
## 8     8 NA     NA         NA    NA
## 9     9 NA     NA         NA    NA
## 10   10 NA     NA         NA    NA
## 11   11 NA     NA         NA    NA
## 12   12 NA     NA         NA    NA
## 13   13 NA     NA         NA    NA
## 14   14 NA     NA         NA    NA
## 15   15 NA     NA         NA    NA
newdata = H
(leaf_pred = predict_leaf.DTR(treeout, H))
##   [1] 4 6 4 7 6 6 6 6 6 6 7 7 4 7 7 7 6 6 6 5 5 6 5 4 5 7 5 5 4 5 7 5 7 5 5 7 7
##  [38] 5 4 7 7 4 4 5 5 7 4 6 7 6 5 6 4 5 6 4 5 6 7 6 4 6 5 4 6 7 5 7 5 6 6 5 5 4
##  [75] 6 4 5 7 7 7 7 7 5 6 7 5 6 4 5 5 4 4 5 6 7 5 4 4 5 6 6 6 5 6 5 6 5 7 5 7 7
## [112] 6 4 6 7 5 4 6 7 5 4 7 4 5 7 4 5 7 4 7 6 6 6 7 5 7 5 4 7 6 5 7 5 7
table(leaf_pred)
## leaf_pred
##  4  5  6  7 
## 27 41 37 39
leaf_n = table(leaf_pred)

tree0 = vis_tree(max_h = 3,treeout,number = table(leaf_pred))
plot(tree0)

Utility visualization

library(ggpubr)
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:randomForest':
## 
##     combine
## The following object is masked from 'package:dplyr':
## 
##     combine
library("cowplot")
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggpubr':
## 
##     get_legend
library(grid)

p_list = list()
leaf_vec = sort(unique(leaf_pred))

n_leaf = length(leaf_vec)
n_record = rep(0,n_leaf)


for(ii in 1:n_leaf){
  chosen = which(leaf_pred==leaf_vec[ii])
  (p_list[[ii]] = my_vis(mu1.hat,mu2.hat,chosen,k=4,label=label0,lwd=1.5))
  n_record[ii] = sum(chosen)
}


# 2. Save the legend
legend <- get_legend(p_list[[1]])

# 3. Remove the legend from the box plot
for(ii in 1:n_leaf){
  p_list[[ii]] <- p_list[[ii]] + theme(legend.position="none") + 
    ggtitle(paste("Leaf Node",ii,", N=",leaf_n[ii]))
}

# 4. Create a blank plot
blankPlot <- ggplot()+geom_blank(aes(1,1)) + 
  cowplot::theme_nothing()
# pdf(file = "~/Desktop/Multi_Outcome/cluster/LZ_compare/LZ_assumption.pdf",
#     width = 17.4 / 2.54*1.5, # The width of the plot in inches
#     height = 17.4 / 2.54 * 0.75) # The height of the plot in inches
grid.arrange(p_list[[1]],p_list[[3]],
             legend,p_list[[2]],p_list[[4]], legend, ncol=3,
             widths=c(5, 5, 2),
             top=textGrob("Model Performance Under Different Weights",
                          gp=gpar(fontsize=18,fontface="bold")))

Point Visualization

p_list2 = list()
p_list3 = list()
leaf_vec = sort(unique(leaf_pred))
n_leaf = length(leaf_vec)


for(ii in 1:n_leaf){
  chosen = which(leaf_pred==leaf_vec[ii])
  (p_list2[[ii]] = my_vis_elipse(mu1.hat,mu2.hat,chosen,
                                 k=4,label=label0,lwd=3))
  (p_list3[[ii]] = my_vis_mean(mu1.hat,mu2.hat,chosen,
                                 k=4,label=label0,lwd=3))
}



# + xlim(-0.3,0.25) + ylim(-0.2,0.3)

plot4 = function(p_list){
  # 2. Save the legend
  legend <- get_legend(p_list[[1]])
  
  # 3. Remove the legend from the box plot
  for(ii in 1:n_leaf){
    p_list[[ii]] <- p_list[[ii]] + theme(legend.position="none") + 
      ggtitle(paste("Leaf Node",leaf_vec[ii],", N=",leaf_n[ii]))
  }
  p1 <- p_list[[1]] + theme(legend.position="none") 
  p2 <- p_list[[2]] + theme(legend.position="none") + xlim(-0.3,0.25) + ylim(-0.2,0.3)
  p3 <- p_list[[3]] + theme(legend.position="none")
  p4 <- p_list[[4]] + theme(legend.position="none")
  
  # 4. Create a blank plot
  blankPlot <- ggplot()+geom_blank(aes(1,1)) + 
    cowplot::theme_nothing()
  # pdf(file = "~/Desktop/Multi_Outcome/cluster/LZ_compare/LZ_assumption.pdf",
  #     width = 17.4 / 2.54*1.5, # The width of the plot in inches
  #     height = 17.4 / 2.54 * 0.75) # The height of the plot in inches
  p = grid.arrange(p1,p3,
               legend,p2,p4, legend, ncol=3,
               widths=c(5, 5, 2),
               top=textGrob("Different Leaf Node's Shape",
                            gp=gpar(fontsize=18,fontface="bold")))
  p
}

# triangle

plot_grid(plot4(p_list = p_list2))
## Warning: Removed 2 rows containing missing values (`geom_point()`).

plot_grid(plot4(p_list = p_list3))

Pairwise Distance

check_pair = function(i,j){
  chosen1 = (leaf_pred==leaf_vec[i])
  chosen2 = (leaf_pred==leaf_vec[j])
  (p = my_vis_pair(mu1.hat,mu2.hat,chosen1,chosen2,k=4,label=label0,lwd=1.5)+
      ggtitle(paste("Group",i,"v.s.","Group",j)))
p
}

q12=check_pair(1,2)+ theme(legend.position="none")+ ylab("Distance")
q13=check_pair(1,3)+ theme(legend.position="none")+ ylab("Distance")
q14=check_pair(1,4)+ theme(legend.position="none")+ ylab("Distance")
q23=check_pair(2,3)+ theme(legend.position="none")+ ylab("Distance")
q24=check_pair(2,4)+ theme(legend.position="none")+ ylab("Distance")
q34=check_pair(3,4)+ theme(legend.position="none")+ ylab("Distance")

Pairwise Mean

library(ggpattern)
require("magick")
## Loading required package: magick
## Linking to ImageMagick 6.9.12.3
## Enabled features: cairo, fontconfig, freetype, heic, lcms, pango, raw, rsvg, webp
## Disabled features: fftw, ghostscript, x11
library(magick)
label1 = c("A","B","C","D")
i = 1
j = 3


chosen1 = which(leaf_pred==leaf_vec[i])
chosen2 = which(leaf_pred==leaf_vec[j])
(p = my_vis_mean_pair(mu1.hat,mu2.hat,chosen1,chosen2,i,j,label1,k,point_size=5))

p2 = my_vis_all_pair(mu1.hat,mu2.hat,leaf_pred,label1,k,alpha = 1)
  
# 2. Save the legend
Total_legend <- get_legend(p2)

generate_mean_pair = function(i,j){
  chosen1 = which(leaf_pred==leaf_vec[i])
  chosen2 = which(leaf_pred==leaf_vec[j])
  (p = my_vis_mean_pair(mu1.hat,mu2.hat,chosen1,chosen2,i,j,label1,k,point_size=5)+
      ggtitle(paste("Group",i,"v.s.","Group",j)))
  return(p)
}

p12 = generate_mean_pair(1,2)+ theme(legend.position="none")
p13 = generate_mean_pair(1,3)+ theme(legend.position="none")
p14 = generate_mean_pair(1,4)+ theme(legend.position="none")
p23 = generate_mean_pair(2,3)+ theme(legend.position="none")
p24 = generate_mean_pair(2,4)+ theme(legend.position="none")
p34 = generate_mean_pair(3,4)+ theme(legend.position="none")
library(ggplot2)
library(gridExtra)
library(grid)

p = grid.arrange(p_list[[1]],p12,p13,p14,
             q12,p_list[[2]],p23,p24,
             q13,q23,p_list[[3]],p34,
             q14,q24,q34,p_list[[4]], ncol=4,
             widths=c(5, 5, 5,5),
             top=textGrob("Model Performance Under Different Weights",
                          gp=gpar(fontsize=18,fontface="bold")))

p <- plot_grid( p,Total_legend, ncol = 1, rel_heights = c(1, .2))

pdf(file = "diagnoal plot.pdf",   # The directory you want to save the file in
    width = 12, # The width of the plot in inches
    height = 9) # The height of the plot in inches
# Step 2: Create the plot with R code
p
# Step 3: Run dev.off() to create the file!
dev.off()
## quartz_off_screen 
##                 2